In this report, the codes and results is summarised for the tasks in Homework 3.
Firstly, we get the consumption data and eliminate 2016-03-27 due to missing hour. After that, we will look at the general summary of the data.
library(kableExtra)
library(data.table)
library(glmnet)
library(CVXR)
library(tidyverse)
data_path = "/home/burak/Desktop/Lectures/IE582/HW3/Data/TurkiyeCons.csv"
data = fread(data_path)
colnames(data)<-c("Date","Hour","Consumption")
data$Consumption = gsub(".","",data$Consumption,fixed = T)
data$Consumption = as.numeric(gsub(",",".",data$Consumption,fixed = T))
data_clean = data%>%transmute(Date=lubridate::dmy(Date),Hour = as.numeric(substr(Hour,1,2)),Consumption)
summary(data_clean)
## Date Hour Consumption
## Min. :2016-01-01 Min. : 0.00 Min. : 0
## 1st Qu.:2017-03-24 1st Qu.: 5.75 1st Qu.:28835
## Median :2018-06-16 Median :11.50 Median :32929
## Mean :2018-06-16 Mean :11.50 Mean :32759
## 3rd Qu.:2019-09-08 3rd Qu.:17.25 3rd Qu.:36350
## Max. :2020-11-30 Max. :23.00 Max. :47062
data_clean%>%filter(Consumption==0)
## Date Hour Consumption
## 1 2016-03-27 2 0
data_clean%>%group_by(Date)%>%summarise(n=length(unique(Hour)))%>%ungroup()%>%filter(n!=24)
## # A tibble: 1 x 2
## Date n
## <date> <int>
## 1 2016-03-27 23
# make NA 2016-03-27 because of save light time.
setDT(data_clean)
data_clean[Date=="2016-03-27",Hour:=c(0:23)]
data_clean[Date=="2016-03-27",Consumption:=NA]
summary(data_clean)
## Date Hour Consumption
## Min. :2016-01-01 Min. : 0.00 Min. :15333
## 1st Qu.:2017-03-24 1st Qu.: 5.75 1st Qu.:28840
## Median :2018-06-16 Median :11.50 Median :32932
## Mean :2018-06-16 Mean :11.50 Mean :32763
## 3rd Qu.:2019-09-08 3rd Qu.:17.25 3rd Qu.:36352
## Max. :2020-11-30 Max. :23.00 Max. :47062
## NA's :24
ggplot(data_clean)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption))+
theme_bw()+labs(x="DateTime")
In this part we will analyze the result of naive prediction of lag48 and lag168. For all tasks, we use until 2020-10-31 as train period and the rest as test period.
test_start = lubridate::as_date("2020-11-01")
naive_pred = data_clean%>%arrange(Date,Hour)%>%mutate(naive_lag48=lag(Consumption,48),
naive_lag168=lag(Consumption,168))%>%filter(Date>=test_start)
naive_final = melt(naive_pred,1:3,variable.name="Model",value.name="Prediction")
naive_mape = naive_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()
naive_mape_general = naive_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
NDay=length(unique(Date)),
Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
Median_Mape=median(Mape),Avg_Mape=mean(Mape),
Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()
p1 = ggplot(naive_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
theme_bw()+labs(x="DateTime")
p2 = ggplot(naive_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
theme_bw()+labs(x="Date",y="MAPE (%)")
gridExtra::grid.arrange(p1,p2,nrow=2)
naive_mape_general %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Model | TestStart | TestEnd | NDay | Min_Mape | Q25_Mape | Median_Mape | Avg_Mape | Q75_Mape | Max_Mape |
|---|---|---|---|---|---|---|---|---|---|
| naive_lag48 | 2020-11-01 | 2020-11-30 | 30 | 0.0074837 | 0.0261611 | 0.0586450 | 0.0778920 | 0.1453798 | 0.2038553 |
| naive_lag168 | 2020-11-01 | 2020-11-30 | 30 | 0.0112858 | 0.0186489 | 0.0292689 | 0.0345367 | 0.0374015 | 0.1234274 |
We can observe that lag168 gives better performance compared to lag48. This may be because of the different characteristics of weekdays. Weekends behave different than working days.
In this part, we build linear regression model for long format including all hours.
lm_long_data = data_clean%>%arrange(Date,Hour)%>%mutate(lag48=lag(Consumption,48),
lag168=lag(Consumption,168))
lm_long_train = lm_long_data%>%filter(Date<test_start)%>%na.omit()
lm_long_test = lm_long_data%>%filter(Date>=test_start)
lm_long_fit = lm(data = lm_long_train,Consumption~lag48+lag168)
summary(lm_long_fit)
##
## Call:
## lm(formula = Consumption ~ lag48 + lag168, data = lm_long_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17125.7 -984.3 -1.2 1015.7 16097.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.570e+03 8.351e+01 18.80 <2e-16 ***
## lag48 3.091e-01 3.096e-03 99.82 <2e-16 ***
## lag168 6.430e-01 3.094e-03 207.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2388 on 42141 degrees of freedom
## Multiple R-squared: 0.7757, Adjusted R-squared: 0.7757
## F-statistic: 7.288e+04 on 2 and 42141 DF, p-value: < 2.2e-16
lm_long_test$linear_regression_long <- predict(lm_long_fit,lm_long_test)
lm_long_final = melt(lm_long_test%>%select(Date,Hour,Consumption,linear_regression_long),1:3,variable.name="Model",value.name="Prediction")
lm_long_mape = lm_long_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()
lm_long_mape_general = lm_long_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
NDay=length(unique(Date)),
Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
Median_Mape=median(Mape),Avg_Mape=mean(Mape),
Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()
p1 = ggplot(lm_long_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
theme_bw()+labs(x="DateTime")
p2 = ggplot(lm_long_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
theme_bw()+labs(x="Date",y="MAPE (%)")
gridExtra::grid.arrange(p1,p2,nrow=2)
lm_long_mape_general %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Model | TestStart | TestEnd | NDay | Min_Mape | Q25_Mape | Median_Mape | Avg_Mape | Q75_Mape | Max_Mape |
|---|---|---|---|---|---|---|---|---|---|
| linear_regression_long | 2020-11-01 | 2020-11-30 | 30 | 0.0121335 | 0.024273 | 0.0406424 | 0.0412358 | 0.0551997 | 0.0889939 |
We observe that both lag48 and lag168 are significant in regression. Overall performance is between two naive methods.
In this part, because of hourly seasonality, we have build linear regression for each our then aggregated results for overall performance.
lm_hourly_test=tibble()
for(hour in 0:23){
temp_train = lm_long_data%>%filter(Date<test_start)%>%filter(Hour==hour)%>%na.omit()
temp_test = lm_long_data%>%filter(Date>=test_start)%>%filter(Hour==hour)
temp_fit = lm(data = temp_train,Consumption~lag48+lag168)
temp_test$linear_regression_hourly <- predict(temp_fit,temp_test)
lm_hourly_test=rbind(lm_hourly_test,temp_test)
}
lm_hourly_final = melt(lm_hourly_test%>%select(Date,Hour,Consumption,linear_regression_hourly),1:3,variable.name="Model",value.name="Prediction")
lm_hourly_mape = lm_hourly_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()
lm_hourly_mape_general = lm_hourly_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
NDay=length(unique(Date)),
Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
Median_Mape=median(Mape),Avg_Mape=mean(Mape),
Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()
p1 = ggplot(lm_hourly_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
theme_bw()+labs(x="DateTime")
p2 = ggplot(lm_hourly_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
theme_bw()+labs(x="Date",y="MAPE (%)")
gridExtra::grid.arrange(p1,p2,nrow=2)
lm_hourly_mape_general %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Model | TestStart | TestEnd | NDay | Min_Mape | Q25_Mape | Median_Mape | Avg_Mape | Q75_Mape | Max_Mape |
|---|---|---|---|---|---|---|---|---|---|
| linear_regression_hourly | 2020-11-01 | 2020-11-30 | 30 | 0.0095117 | 0.0252687 | 0.0443367 | 0.0424977 | 0.0541299 | 0.0850646 |
According to performance results, we can derive that modelling each hour separately does not improve performance significantly for linear regression.
In this part, we will apply lasso regression with L1 penalty by using cv.glmnet. For cv.glmnet, we will use alpha = 1 parameter to apply L1 because alpha=0 gives ridge and alpha =1 gives lasso regression. Also, we have modeled each hour separately and for each hour, we use the wide lag48 and lag168 feature set.
Also, we will analyze coefficients and lambda values of models.
feature_matrix = dcast(as.data.table(lm_long_data%>%na.omit()),Date~Hour,value.var=c("lag48","lag168"))
train_matrix = feature_matrix%>%filter(Date<test_start)
test_matrix = feature_matrix%>%filter(Date>=test_start)
train_dates = train_matrix$Date
train_feature = as.matrix(train_matrix%>%select(-Date))
test_feature = as.matrix(test_matrix%>%select(-Date))
par(mfrow=c(4,2))
full_summary_glmnet = tibble()
glmnet_lasso_hourly_final = tibble()
for(hour in 0:23){
train_target = (lm_long_data%>%filter(Hour==hour&Date%in%train_dates))$Consumption
set.seed(1)
temp_fit = cv.glmnet(x = train_feature,y=train_target,nfolds = 10,alpha=1)
plot(temp_fit)
plot(temp_fit$glmnet.fit)
temp_pred = data.table(Date=test_matrix$Date,Hour=hour,glmnet_lasso_hourly_lambdamin=predict(temp_fit,test_feature,s=c("lambda.min"))[,1],
glmnet_lasso_hourly_lambda1se=predict(temp_fit,test_feature,s=c("lambda.1se"))[,1])
glmnet_lasso_hourly_final = rbind(glmnet_lasso_hourly_final,temp_pred)
full_summary_glmnet=full_summary_glmnet%>%
bind_rows(data.table(variable=names(as.matrix(coef(temp_fit))[,1]),coefficent=as.matrix(coef(temp_fit))[,1],
model=paste0("model_for_hour_",hour),lambda1se=temp_fit$lambda.1se,lambdamin=temp_fit$lambda.min)
)
}
par(mfrow=c(1,1))
full_summary_glmnet=full_summary_glmnet%>%group_by(model)%>%mutate(coef_order=row_number())%>%ungroup()%>%
group_by(variable)%>%mutate(model_order=row_number())%>%ungroup()
ggplot(full_summary_glmnet%>%filter(variable!="(Intercept)"))+geom_tile(aes(x=reorder(variable,coef_order),y=reorder(model,model_order),fill=coefficent))+
theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplot(full_summary_glmnet%>%select(model,lambda1se,lambdamin,model_order)%>%unique())+
geom_bar(aes(x=reorder(model,model_order),y=lambda1se,fill="lambda1se"),stat = "identity")+theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplot(full_summary_glmnet%>%select(model,lambda1se,lambdamin,model_order)%>%unique())+
geom_bar(aes(x=reorder(model,model_order),y=lambdamin,fill="lambdamin"),stat = "identity")+theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
glmnet_lasso_hourly_final=melt(glmnet_lasso_hourly_final,1:2,variable.name="Model",value.name="Prediction")%>%left_join(lm_long_data%>%select(Date,Hour,Consumption))
glmnet_lasso_hourly_mape = glmnet_lasso_hourly_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()
glmnet_lasso_hourly_mape_general = glmnet_lasso_hourly_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
NDay=length(unique(Date)),
Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
Median_Mape=median(Mape),Avg_Mape=mean(Mape),
Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()
p1 = ggplot(glmnet_lasso_hourly_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
theme_bw()+labs(x="DateTime")
p2 = ggplot(glmnet_lasso_hourly_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
theme_bw()+labs(x="Date",y="MAPE (%)")
gridExtra::grid.arrange(p1,p2,nrow=2)
glmnet_lasso_hourly_mape_general %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Model | TestStart | TestEnd | NDay | Min_Mape | Q25_Mape | Median_Mape | Avg_Mape | Q75_Mape | Max_Mape |
|---|---|---|---|---|---|---|---|---|---|
| glmnet_lasso_hourly_lambdamin | 2020-11-01 | 2020-11-30 | 30 | 0.0079057 | 0.0153607 | 0.0187656 | 0.0243023 | 0.0255123 | 0.0801177 |
| glmnet_lasso_hourly_lambda1se | 2020-11-01 | 2020-11-30 | 30 | 0.0069945 | 0.0146829 | 0.0194586 | 0.0241893 | 0.0277244 | 0.0651066 |
We observe that;
In this part, we will apply fused penalized regression. Firstly, we will apply cross validation to select (lambda1, lambda2) values.
For an objective function; following function is used. Problem is solved via CVXR.
obj <- sum_squares(train_target - cbind(1,train_feature) %*% beta) / (2 * n) +
lambda2 * sum_squares(beta[-1])+ # Ridge Penalty
lambda1 * sum(diff(beta)[-c(1,25)]) # Fused Lasso
From glmnet documentation, sum of squares is divided by 2*number_of_obs because of scaling. Also, intercept is not included in ridge penalty. So, we used sum_squares(beta[-1]) as ridge penalty. Lastly, because we have 24 + 24 feature for lag48 and lag168, we should exclude diff(beta)[-c(1,25)] where 1 is the difference with intercept and 25 is the difference between lag_168_0 - lag_48_23
Now, we apply cross validation with 5 fold, lambda1=lambda2=c(0,10,20…..,90,100) and for 0,6,12,18 hours. The more extended grid may required but because of computational issues, we have moved with these setting. Cross valdiation results are compared via MSE.
nfold=5
feature_matrix = dcast(as.data.table(lm_long_data%>%na.omit()),Date~Hour,value.var=c("lag48","lag168"))
train_matrix = feature_matrix%>%filter(Date<test_start)
test_matrix = feature_matrix%>%filter(Date>=test_start)
train_dates = train_matrix$Date
cv_result = tibble()
for(fold in 1:nfold){
set.seed(fold)
out_fold_dates = sample(train_dates,length(train_dates)/nfold)
train_feature = as.matrix(train_matrix%>%filter(!Date%in%out_fold_dates)%>%dplyr::select(-Date))
test_feature = as.matrix(train_matrix%>%filter(Date%in%out_fold_dates)%>%dplyr::select(-Date))
for(hour in c(0,6,12,18)){
train_target = (lm_long_data%>%filter(Hour==hour&Date%in%train_dates)%>%filter(!Date%in%out_fold_dates))$Consumption
n = length(train_feature)
beta <- Variable(ncol(train_feature)+1)
for(lambda1 in seq(0,100,10)){
lambda2=lambda1
obj <- sum_squares(train_target - cbind(1,train_feature) %*% beta) / (2 * n) +
lambda2 * sum_squares(beta[-1])+ # Ridge Penalty
lambda1 * sum(diff(beta)[-c(1,25)]) # Fused Lasso
prob <- Problem(Minimize(obj))
temp_result <- solve(prob)
temp_beta = temp_result$getValue(beta)
temp_pred = cbind(1,test_feature)%*%temp_beta
temp_result=lm_long_data%>%filter(Hour==hour&Date%in%out_fold_dates)%>%dplyr::select(Date,Hour,Consumption)%>%
mutate(Prediction=temp_pred,lambda1=lambda1,lambda2=lambda2,fold=fold)
cv_result = rbind(cv_result,temp_result)
}
}
}
cv_result_final=cv_result%>%group_by(lambda1,lambda2)%>%summarise(MSE=mean((Consumption-Prediction)^2))%>%arrange(MSE)
cv_result_final %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| lambda1 | lambda2 | MSE |
|---|---|---|
| 90 | 90 | 2079125 |
| 100 | 100 | 2079185 |
| 80 | 80 | 2079371 |
| 70 | 70 | 2079983 |
| 60 | 60 | 2081037 |
| 50 | 50 | 2082633 |
| 40 | 40 | 2084900 |
| 30 | 30 | 2088011 |
| 20 | 20 | 2092204 |
| 10 | 10 | 2097816 |
| 0 | 0 | 2105340 |
According to results, lambda1=lambda2=90 gives smallest MSE. Now, we proceed with these lambdas to take prediction for each hour.
feature_matrix = dcast(as.data.table(lm_long_data%>%na.omit()),Date~Hour,value.var=c("lag48","lag168"))
train_matrix = feature_matrix%>%filter(Date<test_start)
test_matrix = feature_matrix%>%filter(Date>=test_start)
train_dates = train_matrix$Date
train_feature = as.matrix(train_matrix%>%select(-Date))
test_feature = as.matrix(test_matrix%>%select(-Date))
lambda1=90
lambda2=90
fused_lasso_hourly_final = tibble()
full_summary_fused=tibble()
for(hour in 0:23){
train_target = (lm_long_data%>%filter(Hour==hour&Date%in%train_dates))$Consumption
n = length(train_feature)
beta <- Variable(ncol(train_feature)+1)
obj <- sum_squares(train_target - cbind(1,train_feature) %*% beta) / (2 * n) +
lambda2 * sum_squares(beta[-1])+ # Ridge Penalty
lambda1 * sum(diff(beta)[-c(1,25)]) # Fused Lasso
prob <- Problem(Minimize(obj))
temp_result <- solve(prob)
temp_beta = temp_result$getValue(beta)
temp_pred = cbind(1,test_feature)%*%temp_beta
temp_result = data.table(Date=test_matrix$Date,Hour=hour,fused_lasso_hourly=temp_pred[,1])
fused_lasso_hourly_final=rbind(fused_lasso_hourly_final,temp_result)
full_summary_fused=full_summary_fused%>%
bind_rows(data.table(variable=names(as.matrix(coef(temp_fit))[,1]),coefficent=temp_beta[,1],
model=paste0("model_for_hour_",hour)))
}
full_summary_fused=full_summary_fused%>%group_by(model)%>%mutate(coef_order=row_number())%>%ungroup()%>%
group_by(variable)%>%mutate(model_order=row_number())%>%ungroup()
ggplot(full_summary_fused%>%filter(variable!="(Intercept)"))+geom_tile(aes(x=reorder(variable,coef_order),y=reorder(model,model_order),fill=coefficent))+
theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
fused_lasso_hourly_final=melt(fused_lasso_hourly_final,1:2,variable.name="Model",value.name="Prediction")%>%left_join(lm_long_data%>%select(Date,Hour,Consumption))
fused_lasso_hourly_mape = fused_lasso_hourly_final%>%group_by(Date,Model)%>%summarise(Mape=mean(abs(Prediction-Consumption)/Consumption))%>%ungroup()
fused_lasso_hourly_mape_general = fused_lasso_hourly_mape%>%group_by(Model)%>%summarise(TestStart=min(Date),TestEnd=max(Date),
NDay=length(unique(Date)),
Min_Mape=min(Mape),Q25_Mape=quantile(Mape,0.25),
Median_Mape=median(Mape),Avg_Mape=mean(Mape),
Q75_Mape=quantile(Mape,0.75),Max_Mape=max(Mape))%>%ungroup()
p1 = ggplot(fused_lasso_hourly_final)+geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Consumption,col="Actual"))+
geom_line(aes(x=lubridate::as_datetime(Date)+lubridate::hours(Hour),y=Prediction,col=Model))+
theme_bw()+labs(x="DateTime")
p2 = ggplot(fused_lasso_hourly_mape)+geom_line(aes(x=Date,y=Mape*100,col=Model))+
theme_bw()+labs(x="Date",y="MAPE (%)")
gridExtra::grid.arrange(p1,p2,nrow=2)
fused_lasso_hourly_mape_general %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Model | TestStart | TestEnd | NDay | Min_Mape | Q25_Mape | Median_Mape | Avg_Mape | Q75_Mape | Max_Mape |
|---|---|---|---|---|---|---|---|---|---|
| fused_lasso_hourly | 2020-11-01 | 2020-11-30 | 30 | 0.0083398 | 0.0158916 | 0.0187858 | 0.0242732 | 0.0251494 | 0.0769423 |
We observe that;
In this part, we compare all results.
full_comparison = rbind(naive_mape,lm_long_mape,lm_hourly_mape,glmnet_lasso_hourly_mape,fused_lasso_hourly_mape)
full_comparison_general = rbind(naive_mape_general,lm_long_mape_general,lm_hourly_mape_general,glmnet_lasso_hourly_mape_general,fused_lasso_hourly_mape_general)
ggplot(full_comparison)+geom_boxplot(aes(x=Model,y=Mape*100,fill=Model))+labs(y="MAPE (%)")+theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
full_comparison_general %>%
kbl() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Model | TestStart | TestEnd | NDay | Min_Mape | Q25_Mape | Median_Mape | Avg_Mape | Q75_Mape | Max_Mape |
|---|---|---|---|---|---|---|---|---|---|
| naive_lag48 | 2020-11-01 | 2020-11-30 | 30 | 0.0074837 | 0.0261611 | 0.0586450 | 0.0778920 | 0.1453798 | 0.2038553 |
| naive_lag168 | 2020-11-01 | 2020-11-30 | 30 | 0.0112858 | 0.0186489 | 0.0292689 | 0.0345367 | 0.0374015 | 0.1234274 |
| linear_regression_long | 2020-11-01 | 2020-11-30 | 30 | 0.0121335 | 0.0242730 | 0.0406424 | 0.0412358 | 0.0551997 | 0.0889939 |
| linear_regression_hourly | 2020-11-01 | 2020-11-30 | 30 | 0.0095117 | 0.0252687 | 0.0443367 | 0.0424977 | 0.0541299 | 0.0850646 |
| glmnet_lasso_hourly_lambdamin | 2020-11-01 | 2020-11-30 | 30 | 0.0079057 | 0.0153607 | 0.0187656 | 0.0243023 | 0.0255123 | 0.0801177 |
| glmnet_lasso_hourly_lambda1se | 2020-11-01 | 2020-11-30 | 30 | 0.0069945 | 0.0146829 | 0.0194586 | 0.0241893 | 0.0277244 | 0.0651066 |
| fused_lasso_hourly | 2020-11-01 | 2020-11-30 | 30 | 0.0083398 | 0.0158916 | 0.0187858 | 0.0242732 | 0.0251494 | 0.0769423 |